import pandas as pd
import plotly.express as px
# Load enriched data
df = pd.read_csv('Dataset/direct_mail_campaigns_enriched.csv')
print(df.head())
Campaign_ID brand Campaign_Type Target_Audience Duration \ 0 1 Innovate Industries Email Men 18-24 30 days 1 2 NexGen Systems Email Women 35-44 60 days 2 3 Alpha Innovations Influencer Men 25-34 30 days 3 4 DataTech Solutions Display All Ages 60 days 4 5 NexGen Systems Email Men 25-34 15 days Channel_Used Location Conversion_Rate Acquisition_Cost ROI \ 0 Google Ads Chicago 0.04 $16,174.00 6.29 1 Google Ads New York 0.12 $11,566.00 5.61 2 YouTube Los Angeles 0.07 $10,200.00 7.18 3 YouTube Miami 0.11 $12,724.00 5.55 4 YouTube Los Angeles 0.05 $16,452.00 6.50 audience_size_mailed total_cost responses revenue cpa roas \ 0 329582 214228.30 10546.624 1561724.307 20.312500 7.29 1 481416 312920.40 46215.936 2068403.844 6.770833 6.61 2 431181 280267.65 24146.136 2292589.377 11.607143 8.18 3 111938 72759.70 9850.544 476576.035 7.386364 6.55 4 262805 170823.25 10512.200 1281174.375 16.250000 7.50 response_rate 0 0.032 1 0.096 2 0.056 3 0.088 4 0.040
# CPA/ROAS by Channel
channel_summary = df.groupby('Channel_Used')[['cpa', 'roas']].mean().reset_index()
print("Channel Summary:")
print(channel_summary)
# CPA/ROAS by Location
location_summary = df.groupby('Location')[['cpa', 'roas']].mean().reset_index()
print("\nLocation Summary:")
print(location_summary.head())
Channel Summary:
Channel_Used cpa roas
0 Email 16.029992 5.996487
1 Facebook 16.021051 6.018699
2 Google Ads 16.211453 6.003141
3 Instagram 16.175276 5.988706
4 Website 16.079477 6.014167
5 YouTube 16.234669 5.993754
Location Summary:
Location cpa roas
0 Chicago 16.161203 6.001590
1 Houston 16.094832 6.007203
2 Los Angeles 16.159589 6.010876
3 Miami 16.128510 6.012282
4 New York 16.083450 5.980228
# Bucket Locations by CPA percentile
# We calculate average CPA per location to classify the location itself
location_perf = df.groupby('Location')['cpa'].mean().reset_index()
cpa_33 = location_perf['cpa'].quantile(0.33)
cpa_66 = location_perf['cpa'].quantile(0.66)
def get_tier(cpa):
if cpa <= cpa_33: return 'Top' # Lower CPA is better
elif cpa <= cpa_66: return 'Middle'
else: return 'Lower'
location_perf['Market_Tier'] = location_perf['cpa'].apply(get_tier)
# Merge back to main df so each campaign has its location's tier
df = df.merge(location_perf[['Location', 'Market_Tier']], on='Location')
print(df[['Location', 'Market_Tier']].drop_duplicates().head())
Location Market_Tier 0 Chicago Lower 40010 New York Top 80034 Los Angeles Lower 119981 Miami Middle 160250 Houston Top
# 1. Bar: Avg CPA by Channel_Used
fig1 = px.bar(channel_summary, x='Channel_Used', y='cpa', title='Average CPA by Channel')
fig1.show()
# 2. Bar: Avg ROAS by Market Tier
tier_summary = df.groupby('Market_Tier')['roas'].mean().reset_index()
# Order tiers logically
tier_order = {'Top': 1, 'Middle': 2, 'Lower': 3}
tier_summary['order'] = tier_summary['Market_Tier'].map(tier_order)
tier_summary = tier_summary.sort_values('order')
fig2 = px.bar(tier_summary, x='Market_Tier', y='roas', title='Average ROAS by Market Tier')
fig2.show()
# 3. Scatter: Audience Size vs CPA
fig3 = px.scatter(df, x='audience_size_mailed', y='cpa', size='revenue', color='Channel_Used',
title='Audience Size vs CPA', hover_data=['Location', 'Market_Tier'])
fig3.show()